library(MASS)
library(ggplot2)
library(stringr)
library(tidyr)
library(plyr)
library(gridExtra)
library(dplyr)
library(stargazer)
library(margins)
library(reshape2)
library(zoo)
library(interactions)
library(lme4)


##############################################################


set.seed(44)

remove(list=ls())

setwd("~/Dropbox/Selection Experiments Harvard/IO replication materials gender/")

sessions <- read.csv("sessionscombined.csv")


###############################
### DATA PREP FOR ANALYSIS
###############################

#	Splitting out the demographics
dems<-c("sessionno","Participant","age","education","gender","income","race")
sessions_demog <- sessions[ which(sessions$Stepgroup.Label == "Demographics"), c(which(colnames(sessions)%in%dems))]

#	Data frame without the demographics or junk rows
sessions <- sessions[ which(sessions$Stepgroup.Label != "Demographics"), ]
sessions <- sessions[ which(sessions$Stepgroup.Label != ""), ]

#	Converting factors to numerics and characters (if necessary)
sessions$prize <- as.numeric(sessions$prize)

sessions$leader <- as.character(sessions$leader)

#	Creating the prize values by part/round.
sessions <- sessions %>% group_by(sessionno,Stepgroup.Label,Stepgroup.Loop) %>% dplyr::mutate(prize=max(prize, na.rm = TRUE))
sessions <- as.data.frame(sessions)


###	Merging in leaders

#	Splitting out the rows that just say who the leader was for a particular round
sessions_leaderids <- sessions[ which(sessions$Participant == ""), ]
sessions_leaderids <- sessions_leaderids[ which(sessions_leaderids$Group != ""), ]
sessions_leaderids <- dplyr::select(sessions_leaderids, c(sessionno, Stepgroup.Label, Stepgroup.Loop, Group, leader))

#	Reshape long to wide
sessions_leaderids <- spread(sessions_leaderids, Group, leader)
sessions_leaderids <- dplyr::select(sessions_leaderids, -c(G.3:G.8))

#	Renaming columns, for merge
colnames(sessions_leaderids)[colnames(sessions_leaderids)=="G.1"] <- "leader_g1"
colnames(sessions_leaderids)[colnames(sessions_leaderids)=="G.2"] <- "leader_g2"

#	Merging in the columns that show the leaders for each group by round
sessions <- merge(sessions, sessions_leaderids, by = c("sessionno","Stepgroup.Label","Stepgroup.Loop"))

#	Merging in the candidate counts by group
#	Splitting out the rows that just say how many candidates ran for a group in a certain round
candidate_count_subset <- sessions[ which(sessions$Participant == ""), ]
candidate_count_subset <- candidate_count_subset[ which(candidate_count_subset$Group != ""), ]
candidate_count_subset <- dplyr::select(candidate_count_subset, c(sessionno, Stepgroup.Label, Stepgroup.Loop, Group, candidateCount))

candidate_count_subset <- spread(candidate_count_subset, Group, candidateCount)
candidate_count_subset <- dplyr::select(candidate_count_subset, -c(G.3:G.8))

#	Renaming columns, for merge
colnames(candidate_count_subset)[colnames(candidate_count_subset)=="G.1"] <- "candidate_count_g1"
colnames(candidate_count_subset)[colnames(candidate_count_subset)=="G.2"] <- "candidate_count_g2"

#	Merging in the columns that show the number of candidates for each group by round
sessions <- merge(sessions, candidate_count_subset, by = c("sessionno","Stepgroup.Label","Stepgroup.Loop"))

#	Creating a variable isleader that = 1 for the two leaders each round
sessions$isleader <- ifelse(sessions$Participant == sessions$leader_g1 | sessions$Participant == sessions$leader_g2 | sessions$Stepgroup.Label == "LotteryExperiment", 1, 0)

sessions %>% filter(Stepgroup.Label == "LotteryExperiment") %>% filter(isleader == 0)
#	Recoding leader = 1 for part 2, rounds 2/7/9
sessions$isleader[sessions$Stepgroup.Label == "GroupExperiment" &  sessions$Stepgroup.Loop == 2] <- 1
sessions$isleader[sessions$Stepgroup.Label == "GroupExperiment" &  sessions$Stepgroup.Loop == 7] <- 1
sessions$isleader[sessions$Stepgroup.Label == "GroupExperiment" &  sessions$Stepgroup.Loop == 9] <- 1

#	Creating a variable that = 1 if the participant was _ever_ an elected leader
sessions$everelecleader <- ifelse(sessions$isleader == 1 & sessions$Stepgroup.Label == "ElectionsandContest", 1, 0)
sessions <- sessions %>% group_by(sessionno,Participant) %>% dplyr::mutate(everelecleader=max(everelecleader, na.rm = TRUE))
sessions <- as.data.frame(sessions)



#	Creating a variable that = 1 if the participant was a candidate in that round
sessions$iscandidate <- ifelse(sessions$candidate == "yes" & sessions$Stepgroup.Label == "ElectionsandContest", 1, 0)

#	Creating a variable that = 1 if the participant was a candidate, but not the leader, in that round
sessions$islosingcandidate <- ifelse(sessions$candidate == "yes" & sessions$isleader == 0 & sessions$Stepgroup.Label == "ElectionsandContest", 1, 0)

#	Creating a variable that = 1 if the participant was not a candidate, in that round
sessions$isnotcandidate <- ifelse(sessions$candidate == "no" & sessions$Stepgroup.Label == "ElectionsandContest", 1, 0)


#	Creating a variable that = 1 if the participant was _ever_ a candidate leader
sessions$evercandidate <- ifelse(sessions$iscandidate == 1 & sessions$Stepgroup.Label == "ElectionsandContest", 1, 0)
sessions <- sessions %>% group_by(sessionno,Participant) %>% dplyr::mutate(evercandidate=max(evercandidate, na.rm = TRUE))
sessions <- as.data.frame(sessions)

sessions$countcandidate <- ifelse(sessions$iscandidate == 1 & sessions$Stepgroup.Label == "ElectionsandContest", 1, 0)
sessions <- sessions %>% group_by(sessionno,Participant) %>% dplyr::mutate(countcandidate=sum(countcandidate, na.rm = TRUE))
sessions <- as.data.frame(sessions)


# Creating variables for part-specific leader indicators
sessions$ispart3leader <- ifelse(sessions$Stepgroup.Label == "ElectionsandContest" & sessions$isleader == 1, 1, 0)
sessions$ispart2leader <- ifelse(sessions$Stepgroup.Label == "GroupExperiment" & sessions$isleader == 1, 1, 0)
sessions$ispart1leader <- ifelse(sessions$Stepgroup.Label == "LotteryExperiment" & sessions$isleader == 1, 1, 0)



#	Creating a variable that = 1 if it's a low value round
sessions$islvr <- ifelse(sessions$prize < 1000, 1, 0)

#	Creating a variable that = 1 if they bought 1000 tickets
sessions$maxtickets <- ifelse(sessions$boughtTickets == 1000, 1, 0)

#	Creating a variable that = 1 in ODD rounds
sessions$isoddround <- ifelse(sessions$Stepgroup.Loop == 1 | sessions$Stepgroup.Loop == 3 | sessions$Stepgroup.Loop == 5 | sessions$Stepgroup.Loop == 7 | 
                                sessions$Stepgroup.Loop == 9 | sessions$Stepgroup.Loop == 11, 1, 0)



### Outcome variables

#	Creating variable for the effort as a percent of the prize value
sessions$effort_pctprize <- sessions$boughtTickets / sessions$prize


# Creating a variable that is a counter of the round number
sessions$roundnumber <- ifelse(sessions$Stepgroup.Label == "GroupExperiment", sessions$Stepgroup.Loop+12, sessions$Stepgroup.Loop)
sessions$roundnumber <- ifelse(sessions$Stepgroup.Label == "ElectionsandContest", sessions$Stepgroup.Loop+24, sessions$roundnumber)

# Creating a dummy variables for each part
sessions$ispart3 <- ifelse(sessions$Stepgroup.Label == "ElectionsandContest", 1, 0)
sessions$ispart2 <- ifelse(sessions$Stepgroup.Label == "GroupExperiment", 1, 0)
sessions$ispart1 <- ifelse(sessions$Stepgroup.Label == "LotteryExperiment", 1, 0)

# Creating a factor that is unique to session-participant
sessions$participantfe_string <- with(sessions, paste0("s.",as.character(sessionno),".",Participant))
sessions$participantfe_factor <- as.factor(sessions$participantfe_string)


#	Creating a variable elected that = 1 for elected leaders who ran unopposed
sessions$electedunopposed <- ifelse(((sessions$Participant == sessions$leader_g1) & (sessions$candidate_count_g1 == 1)) | ((sessions$Participant == sessions$leader_g2) & (sessions$candidate_count_g2 == 1)), 1, 0)

#sanity checking the number of leaders who ran unopposed
nrow(sessions %>% filter(electedunopposed == 1))
#38

# Creating a variable selectedrandomlydsg that = 1 for leaders who were selected randomly because no one ran in an election in part 3
sessions$selectedrandomlydsg <- ifelse(((sessions$Participant == sessions$leader_g1) & (sessions$candidate_count_g1 == 0)) | ((sessions$Participant == sessions$leader_g2) & (sessions$candidate_count_g2 == 0)) , 1, 0)

#sanity checking the number of leaders in DSG who were randomly selected
nrow(sessions %>% filter(selectedrandomlydsg == 1))
#6

# FiINAL SAMPLE PREP

sessions <- sessions[ which(sessions$boughtTickets != "NA"), ]

# Removing strange participants
sessions$sessionparticipant <- with(sessions, paste0(as.character(sessions$sessionno),sessions$Participant))
# Strange: 224P.3 (1000 or 200 only; L) 220P.4 (1000 or 999/998; L) 214P.6 (Almost always 1000; L)
# All 1000: 218P.18 (NL)
# All 160: 214.P20

suspected_bot_observations <- sessions[ which(sessions$sessionparticipant == "218P.18" | sessions$sessionparticipant == "214P.20"),]

sessions <- sessions[ which(sessions$sessionparticipant != "218P.18"), ]
sessions <- sessions[ which(sessions$sessionparticipant != "214P.20"), ] 



# Removing one abberant observation where boughtTickets exceeded our programmtically set maximum
sessions <- sessions %>% filter(boughtTickets < 1001)



# Below determines which participants dropped out early (before reaching DSG) and tags them with earlydrop == 1
# 9 participants who dropped out early and didn't make it play in all three stages
# 7 dropped out after playing one round or less
# 2 dropped out during second stage (1 played 17 rounds and the other 15)


# This gets rid of participant FE factor like "s.219." which doesn't actually correspond to a real session-participant FE
parts<-levels(sessions$participantfe_factor)

for(i in c(1:length(parts))){
  parts[i]<-ifelse(nrow(subset(sessions,sessions$participantfe_factor==parts[i]))==0,NA,parts[i])
}
parts<-parts[-c(which(is.na(parts)==TRUE))]

type<-rep(NA,length(parts))
indeff<-rep(NA,length(parts))
groupeff<-rep(NA,length(parts))
demeff<-rep(NA,length(parts))
hypgroupeff<-rep(NA,length(parts))
hypdemeff<-rep(NA,length(parts))

for(i in c(1:length(parts))){
  subby<-subset(sessions,sessions$participantfe_factor==parts[i])
  type[i]<-(sum(subby$everelecleader)/nrow(subby))	
  subby1<-subset(subby,subby$Stepgroup.Label=="LotteryExperiment")
  subby2<-subset(subby,subby$Stepgroup.Label=="GroupExperiment")
  subby3<-subset(subby,subby$Stepgroup.Label=="ElectionsandContest")
  subby2l<-subset(subby2,subby2$isleader==1)
  subby2h<-subset(subby2,subby2$isleader==0)	
  subby3l<-subset(subby3,subby3$isleader==1)
  subby3h<-subset(subby3,subby3$isleader==0)	
  indeff[i]<-mean(subby1$effort_pctprize,na.rm=TRUE)
  groupeff[i]<-mean(subby2l$effort_pctprize,na.rm=TRUE)
  demeff[i]<-mean(subby3l$effort_pctprize,na.rm=TRUE)
  hypgroupeff[i]<-mean(subby2h$effort_pctprize,na.rm=TRUE)
  hypdemeff[i]<-mean(subby3h$effort_pctprize,na.rm=TRUE)
}


data<-data.frame(parts,type,indeff,groupeff,hypgroupeff,demeff,hypdemeff)

data$earlydrop<-ifelse(is.na(data$hypdemeff)==TRUE,1,0)

for(i in c(1:nrow(sessions))){
  name<-as.character(sessions$participantfe_factor[i])
  sessions$earlydrop[i]<-ifelse(data[which(data$parts==name),which(colnames(data)=="earlydrop")]==1,1,0)
}

## 9 participants who dropped out early and didn't make it play in all three stages
## 7 dropped out after playing one round 
## 2 dropped out during second stage (1 played 17 rounds and the other 15)
summary(sessions[which(sessions$earlydrop==1),]$participantfe_factor)

# Num dropped out
length(unique(sessions[which(sessions$earlydrop==1),]$sessionparticipant))


# Num total
length(unique(sessions$sessionparticipant))


# Remove the observations from participants who dropped out early
sessions <- sessions %>% filter(earlydrop == 0)
suspected_bot_observations$earlydrop <- 0


# Create a factor variable to categorize the prize amount levels
sessions$payofflevel<-ifelse(sessions$prize<1000,1,ifelse(sessions$prize>1000&sessions$prize<1500,2,ifelse(sessions$prize>1500 & sessions$prize<2000,3,ifelse(sessions$prize>2000 & sessions$prize<2400,4, ifelse(sessions$prize>2400 & sessions$prize<2500,5,6)))))
suspected_bot_observations$payofflevel <- ifelse(suspected_bot_observations$prize<1000,1,ifelse(suspected_bot_observations$prize>1000&suspected_bot_observations$prize<1500,2,ifelse(suspected_bot_observations$prize>1500 & suspected_bot_observations$prize<2000,3,ifelse(suspected_bot_observations$prize>2000 & suspected_bot_observations$prize<2400,4, ifelse(suspected_bot_observations$prize>2400 & suspected_bot_observations$prize<2500,5,6)))))


#add in demographics
dems2<-c("age","education","gender","income","race")
sessions<- sessions[,-c(which(colnames(sessions)%in%dems2))]
sessions<-merge(sessions,sessions_demog)

sessions$female<-ifelse(sessions$gender=="f",1,ifelse(sessions$gender=="m",0,NA))

sessions$boughtTickets_100<-sessions$boughtTickets/100

## Add in campaign messages

sessions<-sessions[,-c(which(colnames(sessions)=="leader_g1"),which(colnames(sessions)=="leader_g2"))]

messages <- read.csv("messages_2020_12_23.csv")
messages <- merge(messages, sessions_leaderids, by = c("sessionno","Stepgroup.Label","Stepgroup.Loop"), all = TRUE)
messages <- messages %>% filter(is.na(msg_type_primary) == FALSE) # There are filler rows in the leaderids datadframe

messages$isleader <- ifelse((messages$leader_g1 == messages$Participant | messages$leader_g2 == messages$Participant),1,0)

messages <- messages %>% filter(sessionno != 217) ### 217 is the cheater session; we coded all their messages but they aren't included in any of the analysis

# merging in the prize values from the sessions dataframe
temp <- sessions[ which(sessions$Participant != ""), ] %>% filter(Stepgroup.Label == "ElectionsandContest") 
messages <- merge(x = messages, y = temp[ , c("sessionno","Stepgroup.Loop","Participant","prize","gender")], by = c("sessionno","Stepgroup.Loop","Participant"),all.x=TRUE, all.y=FALSE)

# change all the nas to zero
messages[is.na(messages)] <- 0

#participant fes
messages$partID<-paste(messages$sessionno,messages$Participant,sep="")

#messages is the dataframe with just the campaign messages; also merge info on campaign messages back into sessions


names(messages)
messages_sub<-messages[,c("candidateMsg","sessionno","Stepgroup.Loop","Participant","Stepgroup.Label","msg_type_primary","msg_strategy", 
                          "msg_skill", "msg_trackrecord", "msg_pastbad", "msg_inteam", "msg_outteam" , 
                          "msg_bidhigh", "msg_null", "msg_speed", "msg_bidlow", "msg_humor")]

temp <- messages
sessions<-sessions[,-c(which(colnames(sessions)=="candidateMsg"))]

sessions<-merge(messages_sub,sessions,all.y=TRUE,by=c("sessionno","Stepgroup.Loop","Participant","Stepgroup.Label"))

sessions <- within(sessions, msg_type_primary <- relevel(as.factor(msg_type_primary), ref = "null"))

names(sessions)
sessions$candidateMsg_char<-nchar(sessions$candidateMsg)

sessions$candidateMsg_word<-sapply(sessions$candidateMsg, function(x) ifelse(is.na(x),NA, length(gregexpr("[[:space:]]",x)[[1]]) +1))



#### Calculate Non-Monetary Values to Winning
sessions$nmv <- 4*(sessions$boughtTickets - (sessions$prize/4))




#### Calculate Averages


#1. ICG Averages
ICG<- ddply(subset(sessions,   !(is.na(boughtTickets)) & Stepgroup.Label=="LotteryExperiment" & isleader==1 ), "participantfe_factor", summarise, ICG_tickets_ave=mean(boughtTickets))

nmv<- ddply(subset(sessions,   !(is.na(nmv)) & Stepgroup.Label=="LotteryExperiment" & isleader==1 ), "participantfe_factor", summarise, NMV_ave=mean(nmv))

sessions<-merge(sessions,ICG,all.x=TRUE)
sessions<-merge(sessions,nmv,all.x=TRUE)

sessions$ICG_tickets_ave_100<-sessions$ICG_tickets_ave/100
sessions$NMV_ave_100<-sessions$NMV_ave/100

#2. RSG Averages
RSG<- ddply(subset(sessions,   !(is.na(boughtTickets)) & Stepgroup.Label=="GroupExperiment" & isleader==1 ), "participantfe_factor", summarise, RSG_tickets_ave=mean(boughtTickets))

nmv<- ddply(subset(sessions,   !(is.na(nmv)) & Stepgroup.Label=="GroupExperiment" & isleader==1 ), "participantfe_factor", summarise, RSG_NMV_ave=mean(nmv))

sessions<-merge(sessions,RSG,all.x=TRUE)
sessions<-merge(sessions,nmv,all.x=TRUE)

sessions$RSG_tickets_ave_100<-sessions$RSG_tickets_ave/100
sessions$RSG_NMV_ave_100<-sessions$RSG_NMV_ave/100

#3. ICG Averages, no LVRs  ##### SC Note, shouldn't these lines say: subset(sessions, prize > 1000 &  !(is.na(boughtTickets))
ICG<- ddply(subset(sessions, prize > 1000 &  !(is.na(boughtTickets)) & Stepgroup.Label=="LotteryExperiment" & isleader==1 ), "participantfe_factor", summarise, ICG_tickets_ave_noLVR=mean(boughtTickets))

nmv<- ddply(subset(sessions, prize > 1000 &  !(is.na(nmv)) & Stepgroup.Label=="LotteryExperiment" & isleader==1 ), "participantfe_factor", summarise, NMV_ave_noLVR=mean(nmv))

sessions<-merge(sessions,ICG,all.x=TRUE)
sessions<-merge(sessions,nmv,all.x=TRUE)

sessions$ICG_tickets_ave_noLVR_100<-sessions$ICG_tickets_ave_noLVR/100
sessions$NMV_ave_noLVR_100<-sessions$NMV_ave_noLVR/100


#4. NMV averages from LVRs only

nmv<- ddply(subset(sessions,  prize < 1000 & !(is.na(nmv)) & Stepgroup.Label=="LotteryExperiment" & isleader==1 ), "participantfe_factor", summarise, NMV_ave_LVR=mean(nmv))

sessions<-merge(sessions,nmv,all.x=TRUE)
sessions$NMV_ave_LVR_100<-sessions$NMV_ave_LVR/100

#5. ICG winning average 
sessions$win<-ifelse(sessions$roundResult=="lose",0,ifelse(sessions$roundResult=="win",1,NA))
win<- ddply(subset(sessions,  !(is.na(boughtTickets)) & Stepgroup.Label=="LotteryExperiment" & isleader==1 ), "participantfe_factor", summarise, ICG_win_ave=mean(win,na.rm=TRUE))
sessions<-merge(sessions,win,all.x=TRUE)

#6. ICG payoffg average 
pays<- ddply(subset(sessions,  !(is.na(boughtTickets)) & Stepgroup.Label=="LotteryExperiment" & isleader==1 ), "participantfe_factor", summarise, ICG_payoff_ave=mean(roundPayoff,na.rm=TRUE))
sessions<-merge(sessions,pays,all.x=TRUE)

#### Create dummy variable for candidates who ran unopposed

unoppose<-read.csv("ranunopposed_scmanualcoding.csv")
sessions<-merge(sessions,unoppose,all.x=TRUE)
sessions$ranunopposed<-ifelse(is.na(sessions$ranunopposed),0,sessions$ranunopposed)


sessions$win<-ifelse(sessions$roundResult=="lose",0,ifelse(sessions$roundResult=="win",1,NA))
win<- ddply(subset(sessions,  !(is.na(boughtTickets)) & Stepgroup.Label=="LotteryExperiment" & isleader==1 ), "participantfe_factor", summarise, winave=mean(win,na.rm=TRUE))
sessions<-merge(sessions,win,all.x=TRUE)

pays<- ddply(subset(sessions,  !(is.na(boughtTickets)) & Stepgroup.Label=="LotteryExperiment" & isleader==1 ), "participantfe_factor", summarise, payoff_ave=mean(roundPayoff,na.rm=TRUE))
sessions<-merge(sessions,pays,all.x=TRUE)

#lagged variables
sessions$stage<-ifelse(sessions$Stepgroup.Label=="LotteryExperiment",as.numeric(sessions$Stepgroup.Loop),ifelse(sessions$Stepgroup.Label=="GroupExperiment",12+as.numeric(sessions$Stepgroup.Loop),24+as.numeric(sessions$Stepgroup.Loop)))
sessions<-data.frame(sessions%>%group_by(participantfe_factor) %>%mutate (payoff_lag=lag(roundPayoff,order_by=stage)))
sessions$payoff_lag<-ifelse(sessions$Stepgroup.Loop==1,NA,sessions$payoff_lag)
sessions<-data.frame(sessions%>%group_by(participantfe_factor) %>%mutate (leader_lag=lag(isleader,order_by=stage)))
sessions$leader_lag<-ifelse(sessions$Stepgroup.Loop==1,NA,sessions$leader_lag)
sessions<-data.frame(sessions%>%group_by(participantfe_factor) %>%mutate (loser_lag=lag(islosingcandidate,order_by=stage)))
sessions$loser_lag<-ifelse(sessions$Stepgroup.Loop==1,NA,sessions$loser_lag)
sessions$iswinningcandidate<-ifelse(sessions$iscandidate==1 & sessions$isleader==1,1,0)
sessions<-data.frame(sessions%>%group_by(participantfe_factor) %>%mutate (winner_lag=lag(iswinningcandidate,order_by=stage)))
sessions$winner_lag<-ifelse(sessions$Stepgroup.Loop==1,NA,sessions$winner_lag)


##########################################################################################
##########################################################################################
############################                                  ############################
############################ DATA PREP DONE - ANALYSIS STARTS ############################
############################                                  ############################
##########################################################################################
##########################################################################################



############################################################
########			Analysis in the Main Manuscript		########
############################################################

### Aggregate differences between M/F
test <- sessions %>% distinct(participantfe_string, .keep_all= TRUE)
testf <- test %>% subset(female == 1)
testm <- test %>% subset(female == 0)
	summary(testf$NMV_ave)
	summary(testm$NMV_ave)
	summary(testf$ICG_tickets_ave)
	summary(testm$ICG_tickets_ave)

###
# Figure 1, Ticket purchases by ICG/DSG, by gender
###
# Left pane, distributions for ICG
	leadertixbygenderyeslvricg <- ddply(subset(sessions, isleader == 1 & Stepgroup.Label=="LotteryExperiment"), c("gender"), summarise, ticket_ave=mean(boughtTickets))
		# The variances are very large because it's large numbers squared
	icgvarbygender <- ddply(subset(sessions, isleader == 1 & Stepgroup.Label=="LotteryExperiment"), c("gender"), summarise, ticket_var=var(boughtTickets))

fig1left <- ggplot(subset(sessions, isleader == 1 & Stepgroup.Label=="LotteryExperiment"), aes(x= boughtTickets, color= gender)) +
	  geom_density() +
	  geom_rug(data = subset(sessions, isleader == 1 & Stepgroup.Label=="LotteryExperiment"), aes(x= boughtTickets, color= gender, y = 0), position = position_jitter(height = 0)) +
	  geom_vline(data = leadertixbygenderyeslvricg, aes(xintercept= ticket_ave, color= gender), linetype = "dashed") +
	  labs(color='') + scale_color_manual(labels = c("women", "men"), values = c("black","grey55")) + xlab("Tickets Bought in ICG") + ylab("Density") +
    theme(axis.title = element_text(size = 14), legend.text=element_text(size=14)) +
		annotate("text",x=c(250,250), y=c(0.0002,0.0001), label=c("Difference =","-29 tickets"),size=5) +
		annotate("text",x=c(250,250), y=c(0.0005,0.0004), label=c("Mean for men = 561.5","Mean for women = 532.6"),size=5)

fig1left		
#		ggsave("~/Dropbox/Selection Experiments Harvard/Drafts and Presentations/LeaderGender_yeslvr_icg.pdf")
### Note: sometimes, this rill return an error reading "object of class NULL".  If you re-run the code a second time, the plot will display.  I'm not sure what generates this error.  https://github.com/tidyverse/ggplot2/issues/2514
###		So I used the trick where you assign the plot to an object name first

# Right pane, tickets by part of the game and gender, LVRs included
	leadertixbygenderyeslvr <- ddply(subset(sessions, isleader == 1 & Stepgroup.Label=="ElectionsandContest"), c("gender"), summarise, ticket_ave=mean(boughtTickets))

fig1right <- ggplot(subset(sessions, isleader == 1 & Stepgroup.Label=="ElectionsandContest"), aes(x= boughtTickets, color= gender)) +
	  geom_density() +
	  geom_rug(data = subset(sessions, isleader == 1 & Stepgroup.Label=="ElectionsandContest"), aes(x= boughtTickets, color= gender, y = 0), position = position_jitter(height = 0)) +
	  geom_vline(data = leadertixbygenderyeslvr, aes(xintercept= ticket_ave, color= gender), linetype = "dashed") +
	  labs(color='') + scale_color_manual(labels = c("women", "men"), values = c("black","grey55")) + xlab("Tickets Bought as Elected Leader") + ylab("Density") +
    theme(axis.title = element_text(size = 14), legend.text=element_text(size=14)) +
		annotate("text",x=c(468,468), y=c(0.0011,0.0010), label=c("Difference =","82 tickets"),size=5) +
		annotate("text",x=c(310,310), y=c(0.0015,0.0014), label=c("Mean for men = 622.8","Mean for women = 704.3"),size=5)
		ggsave("~/Dropbox/Selection Experiments Harvard/Drafts and Presentations/LeaderGender_yeslvr.pdf")

fig1right

# Regression for the p value associated with the difference in Figure 1, right pane
dsg.mf.nolvr<- lm(boughtTickets~female, data = subset(sessions, !(is.na(boughtTickets)) & isleader == 1 & Stepgroup.Label=="ElectionsandContest" & prize > 1000))
summary(dsg.mf.nolvr)

dsg.mf.yeslvr<- lm(boughtTickets~female, data = subset(sessions, !(is.na(boughtTickets)) & isleader == 1 & Stepgroup.Label=="ElectionsandContest"))
summary(dsg.mf.yeslvr)
# Despite the differences in the ICG...
ddply(subset(sessions,  Stepgroup.Label=="LotteryExperiment"), c("gender"), summarise, ICG_tickets_ave=mean(ICG_tickets_ave))


###
# Table 1: Differences in NMVWs, by gender
###

#  Leaders vs not leaders 
ddply(subset(sessions,   !(is.na(boughtTickets)) & Stepgroup.Label=="ElectionsandContest"  & iscandidate==1 & isleader==1), c("gender"), summarise, ICG_tickets_ave=mean(ICG_tickets_ave))
ddply(subset(sessions,   !(is.na(boughtTickets)) & Stepgroup.Label=="ElectionsandContest"  &  isleader==0), c("gender"), summarise, ICG_tickets_ave=mean(ICG_tickets_ave))
ddply(subset(sessions,   !(is.na(boughtTickets)) & Stepgroup.Label=="ElectionsandContest"  & iscandidate==1& isleader==1), c("gender"), summarise, NMV_ave=mean(NMV_ave))
ddply(subset(sessions,   !(is.na(boughtTickets)) & Stepgroup.Label=="ElectionsandContest"  &  isleader==0), c("gender"), summarise, NMV_ave=mean(NMV_ave))

# Candidates vs Not Candidates
ddply(subset(sessions,   !(is.na(boughtTickets)) & Stepgroup.Label=="ElectionsandContest" ), c("iscandidate","gender"), summarise, ICG_tickets_ave=mean(ICG_tickets_ave))
ddply(subset(sessions,  !(is.na(boughtTickets)) & Stepgroup.Label=="ElectionsandContest" ), c("iscandidate","gender"), summarise, NMV_ave=mean(NMV_ave))

# Winners vs Losers
ddply(subset(sessions,   !(is.na(boughtTickets)) & Stepgroup.Label=="ElectionsandContest" & iscandidate==1 & ranunopposed==0), c("iswinningcandidate","gender"), summarise, ICG_tickets_ave=mean(ICG_tickets_ave))
ddply(subset(sessions,  !(is.na(boughtTickets)) & Stepgroup.Label=="ElectionsandContest" & iscandidate==1 & ranunopposed==0), c("iswinningcandidate","gender"), summarise, NMV_ave=mean(NMV_ave))


# Same thing, excluding LVRs
#  Leaders vs not leaders 
ddply(subset(sessions,   !(is.na(boughtTickets)) & Stepgroup.Label=="ElectionsandContest"  & iscandidate==1 & isleader==1 & prize > 1000), c("gender"), summarise, ICG_tickets_ave=mean(ICG_tickets_ave))
ddply(subset(sessions,   !(is.na(boughtTickets)) & Stepgroup.Label=="ElectionsandContest"  &  isleader==0 & prize > 1000), c("gender"), summarise, ICG_tickets_ave=mean(ICG_tickets_ave))
ddply(subset(sessions,   !(is.na(boughtTickets)) & Stepgroup.Label=="ElectionsandContest"  & iscandidate==1& isleader==1 & prize > 1000), c("gender"), summarise, NMV_ave=mean(NMV_ave))
ddply(subset(sessions,   !(is.na(boughtTickets)) & Stepgroup.Label=="ElectionsandContest"  &  isleader==0 & prize > 1000), c("gender"), summarise, NMV_ave=mean(NMV_ave))

# Candidates vs Not Candidates
ddply(subset(sessions,   !(is.na(boughtTickets)) & Stepgroup.Label=="ElectionsandContest"  & prize > 1000), c("iscandidate","gender"), summarise, ICG_tickets_ave=mean(ICG_tickets_ave))
ddply(subset(sessions,  !(is.na(boughtTickets)) & Stepgroup.Label=="ElectionsandContest"  & prize > 1000), c("iscandidate","gender"), summarise, NMV_ave=mean(NMV_ave))

# Winners vs Losers
ddply(subset(sessions,   !(is.na(boughtTickets)) & Stepgroup.Label=="ElectionsandContest" &iscandidate==1 & prize > 1000), c("iswinningcandidate","gender"), summarise, ICG_tickets_ave=mean(ICG_tickets_ave))
ddply(subset(sessions,  !(is.na(boughtTickets)) & Stepgroup.Label=="ElectionsandContest" &iscandidate==1 & prize > 1000), c("iswinningcandidate","gender"), summarise, NMV_ave=mean(NMV_ave))




###
# Table 2 and Fig 2: Regression of iscandidate and isleader on NMVW X Female + substantive effects plot
###

iscand_mf_icgave_log<- glm(iscandidate~female*ICG_tickets_ave_100 , data = subset(sessions,  !(is.na(boughtTickets)) & Stepgroup.Label=="ElectionsandContest" ), family = binomial)
iscand_mf_nash_log <- glm(iscandidate ~female*NMV_ave_100, data = subset(sessions,  !(is.na(boughtTickets)) & Stepgroup.Label=="ElectionsandContest" ), family = binomial)

isleader_mf_icgave_log<- glm(isleader~female*ICG_tickets_ave_100, data = subset(sessions,  !(is.na(boughtTickets)) & Stepgroup.Label=="ElectionsandContest" ), family = binomial)
isleader_mf_nash_log <- glm(isleader ~female*NMV_ave_100, data = subset(sessions,  !(is.na(boughtTickets)) & Stepgroup.Label=="ElectionsandContest" ), family = binomial)

iswinner_mf_icgave_log<- glm(iswinningcandidate~female*ICG_tickets_ave_100, data = subset(sessions,  !(is.na(boughtTickets)) & Stepgroup.Label=="ElectionsandContest" &iscandidate==1 & ranunopposed==0), family = binomial)
iswinner_mf_nash_log <- glm(iswinningcandidate ~female*NMV_ave_100, data = subset(sessions,  !(is.na(boughtTickets)) & Stepgroup.Label=="ElectionsandContest"  &iscandidate==1& ranunopposed==0), family = binomial)


stargazer(isleader_mf_nash_log, isleader_mf_icgave_log, iscand_mf_nash_log, iscand_mf_icgave_log,iswinner_mf_nash_log,iswinner_mf_icgave_log ,type = "latex",
          title = "Effect of NMVW on Leadership and Candidacy, by Gender",
          label="tab:leadcandlogit",
          covariate.labels = c("Female","Nash NMVW","Female X Nash NMVW","Ave. ICG Tickets","Female X Ave ICG Tick.","Constant"),
          dep.var.labels=c("Is Leader", "Is Candidate","Is Winner"), keep.stat = c("n"))


leadplot <- interact_plot(isleader_mf_nash_log, pred = NMV_ave_100, modx = female, interval = TRUE, x.label = "ICG Average Nash NMVW", y.label = "Pr. of Becoming Leader", modx.labels = c("men","women"),
#legend.main = "Leadership", rug = TRUE) + scale_y_continuous(,limits=c(0,0.75))
# ggsave("~/Dropbox/Selection Experiments Harvard/Drafts and Presentations/lead.pdf")
legend.main = "Leadership", rug = TRUE) + scale_y_continuous(,limits=c(0,0.45))
#ggsave("~/Dropbox/Selection Experiments Harvard/Drafts and Presentations/leadtrimmed.pdf")

candplot <- interact_plot(iscand_mf_nash_log, pred = NMV_ave_100, modx = female, interval = TRUE, x.label = "ICG Average Nash NMVW", y.label = "Pr. of Candidacy", modx.labels = c("men","women"), legend.main = 
#"Candidacy", rug = TRUE) + scale_y_continuous(,limits=c(0,1))
#ggsave("~/Dropbox/Selection Experiments Harvard/Drafts and Presentations/cand.pdf")
"Candidacy", rug = TRUE) + scale_y_continuous(,limits=c(0,0.78))
#ggsave("~/Dropbox/Selection Experiments Harvard/Drafts and Presentations/candtrimmed.pdf")

winplot <- interact_plot(iswinner_mf_nash_log, pred = NMV_ave_100, modx = female, interval = TRUE, x.label = "ICG Average Nash NMVW", y.label = "Pr. of Winning", modx.labels = c("men","women"), legend.main = "Winning Candidacy", rug = TRUE) + scale_y_continuous(,limits=c(0,1))
#ggsave("~/Dropbox/Selection Experiments Harvard/Drafts and Presentations/win.pdf")


###
# Table 3: Length of campaing messages, by effort and gender
###

camplength1<-lm(candidateMsg_char~ female*ICG_tickets_ave_100,data=subset(sessions,sessions$iscandidate==1 & sessions$Stepgroup.Label=="ElectionsandContest" ))
camplength2<-lm(candidateMsg_char~ female*NMV_ave_100,data=subset(sessions,sessions$iscandidate==1 & sessions$Stepgroup.Label=="ElectionsandContest" ))

stargazer(camplength1,camplength2, type = "latex",
          title = "Length of Campaign Messages, by Effort and Gender",
          label="tab:camplength_regression",
          covariate.labels = c("Female","Ave. ICG Tickets","Female x Ave. ICG Tickets ","Ave. ICG NMV","Female x Ave. ICG NMV"),
          dep.var.labels="Words in Campaign Message",notes = "Averages given in 100s of tickets."
       )


###
# Table 4: Differences in NMVW, using RSG data
###

#  Leaders vs not leaders 
ddply(subset(sessions,   !(is.na(boughtTickets)) & Stepgroup.Label=="ElectionsandContest"  & iscandidate==1 & isleader==1), c("gender"), summarise, RSG_tickets_ave=mean(RSG_tickets_ave))
ddply(subset(sessions,   !(is.na(boughtTickets)) & Stepgroup.Label=="ElectionsandContest"  &  isleader==0), c("gender"), summarise, RSG_tickets_ave=mean(RSG_tickets_ave))
ddply(subset(sessions,   !(is.na(boughtTickets)) & Stepgroup.Label=="ElectionsandContest"  & iscandidate==1& isleader==1), c("gender"), summarise, RSG_NMV_ave=mean(RSG_NMV_ave))
ddply(subset(sessions,   !(is.na(boughtTickets)) & Stepgroup.Label=="ElectionsandContest"  &  isleader==0), c("gender"), summarise, RSG_NMV_ave=mean(RSG_NMV_ave))

# Candidates vs Not Candidates
ddply(subset(sessions,   !(is.na(boughtTickets)) & Stepgroup.Label=="ElectionsandContest" ), c("iscandidate","gender"), summarise, RSG_tickets_ave=mean(RSG_tickets_ave))
ddply(subset(sessions,  !(is.na(boughtTickets)) & Stepgroup.Label=="ElectionsandContest" ), c("iscandidate","gender"), summarise, RSG_NMV_ave=mean(RSG_NMV_ave))

# Winners vs Losers
ddply(subset(sessions,  !(is.na(boughtTickets)) & Stepgroup.Label=="ElectionsandContest" & iscandidate==1 &ranunopposed==0), c("isleader","gender"), summarise, RSG_tickets_ave=mean(RSG_tickets_ave))
ddply(subset(sessions,   !(is.na(boughtTickets)) & Stepgroup.Label=="ElectionsandContest"& iscandidate==1 &ranunopposed==0), c("isleader","gender"), summarise, RSG_NMV_ave=mean(RSG_NMV_ave))




#############################################################################################################################################################################################################################
# Appendix Analyses
#############################################################################################################################################################################################################################

###
# A.4 Demographics figures are in a separate file, since they call CCES data
###




####################################################################
########	App C: Robustness of M/F Differences in DSG		########
####################################################################

#### This section is all the robustness checks for the difference in DSG ticket purchases for M/F, as shown in Figure 1.

#### Robustness Regressions

dsg.mf.nolvr<- lm(boughtTickets~female*ispart3, data = subset(sessions, !(is.na(boughtTickets)) & isleader == 1 & Stepgroup.Label!="GroupExperiment" & prize > 1000))
summary(dsg.mf.nolvr)

###
# Table C.1 Effect of gender on tickets bought, Without Prize FE
###

#Including LVRs
dsg.mf<- lm(boughtTickets~female*ispart3, data = subset(sessions, !(is.na(boughtTickets)) & isleader == 1 & sessions$Stepgroup.Label!="GroupExperiment"))
summary(dsg.mf)

# Excluding outlier woman who was leaders a lot
dsg.mf.nofout <- lm(boughtTickets~female*ispart3, data = subset(sessions, !(is.na(boughtTickets)) & isleader == 1 & sessions$Stepgroup.Label!="GroupExperiment" &  participantfe_string != "s.225.P.8"))
summary(dsg.mf.nofout)

# Excluding outlier men who were leaders a lot
dsg.mf.nomout <- lm(boughtTickets~female*ispart3, data = subset(sessions, !(is.na(boughtTickets)) & isleader == 1 & sessions$Stepgroup.Label!="GroupExperiment" & participantfe_string != "s.223.P.15" & participantfe_string != "s.219.P.20" & participantfe_string != "s.209.P.8" & participantfe_string != "s.207.P.13"))
summary(dsg.mf.nomout)

# Excluding outlier men who were leaders a lot
dsg.mf.nomfout <- lm(boughtTickets~female*ispart3, data = subset(sessions, !(is.na(boughtTickets)) & isleader == 1 & sessions$Stepgroup.Label!="GroupExperiment" & participantfe_string != "s.223.P.15" & participantfe_string != "s.219.P.20" & participantfe_string != "s.209.P.8" & participantfe_string != "s.207.P.13" &  participantfe_string != "s.225.P.8"))
summary(dsg.mf.nomfout)

# Excluding outlier men and women 
dsg.mf.nomfout <- lm(boughtTickets~female*ispart3, data = subset(sessions, !(is.na(boughtTickets)) & isleader == 1 & sessions$Stepgroup.Label!="GroupExperiment" & participantfe_string != "s.223.P.15" & participantfe_string != "s.219.P.20" & participantfe_string != "s.209.P.8" & participantfe_string != "s.207.P.13" & participantfe_string != "s.225.P.8"))
summary(dsg.mf.nomfout)

#stargazer(dsg.mf.nolvr, dsg.mf, dsg.mf.nofout, dsg.mf.nomout, dsg.mf.nomfout,type = "latex",
stargazer(dsg.mf, dsg.mf.nolvr, dsg.mf.nofout, dsg.mf.nomout, dsg.mf.nomfout,type = "latex",
          title = "Effect of Gender on Tickets Bought",
          label="tab:mfdsgrobust",
          covariate.labels = c("Female","DSG","Female X DSG","Constant"),
          column.labels=c("Incl. LVR","No LVR", "Excl. Fem. Outl.","Excl. Male Outl.","Excl. M/F Outl."), keep.stat = c("n"),
          dep.var.labels.include = FALSE)


###
# Table C.2 Effect of gender on tickets bought, With Prize FE
###
 
# Like main figure, with PFE
dsg.mf.nolvr.pfe<- lm(boughtTickets~female*ispart3 + factor(payofflevel), data = subset(sessions, !(is.na(boughtTickets)) & isleader == 1 & Stepgroup.Label!="GroupExperiment" & prize > 1000))
summary(dsg.mf.nolvr.pfe)

#Including LVRs
dsg.mf.pfe<- lm(boughtTickets~female*ispart3 + factor(payofflevel), data = subset(sessions, !(is.na(boughtTickets)) & isleader == 1 & sessions$Stepgroup.Label!="GroupExperiment"))
summary(dsg.mf.pfe)

# Excluding outlier woman who was leaders a lot
dsg.mf.nofout.pfe <- lm(boughtTickets~female*ispart3 + factor(payofflevel), data = subset(sessions, !(is.na(boughtTickets)) & isleader == 1 & sessions$Stepgroup.Label!="GroupExperiment" &  participantfe_string != "s.225.P.8"))
summary(dsg.mf.nofout.pfe)

# Excluding outlier men who were leaders a lot
dsg.mf.nomout.pfe <- lm(boughtTickets~female*ispart3 + factor(payofflevel), data = subset(sessions, !(is.na(boughtTickets)) & isleader == 1 & sessions$Stepgroup.Label!="GroupExperiment" & participantfe_string != "s.223.P.15" & participantfe_string != "s.219.P.20" & participantfe_string != "s.209.P.8" & participantfe_string != "s.207.P.13"))
summary(dsg.mf.nomout.pfe)

# Excluding outlier men and women 
dsg.mf.nomfout.pfe <- lm(boughtTickets~female*ispart3 + factor(payofflevel), data = subset(sessions, !(is.na(boughtTickets)) & isleader == 1 & sessions$Stepgroup.Label!="GroupExperiment" & participantfe_string != "s.223.P.15" & participantfe_string != "s.219.P.20" & participantfe_string != "s.209.P.8" & participantfe_string != "s.207.P.13" & participantfe_string != "s.225.P.8"))
summary(dsg.mf.nomfout.pfe)

stargazer(dsg.mf.nolvr.pfe, dsg.mf.pfe, dsg.mf.nofout.pfe, dsg.mf.nomout.pfe, dsg.mf.nomfout.pfe,type = "latex",
          title = "Effect of Gender on Tickets Bought, with Prize Fixed Effects",
          label="tab:mfdsgrobustpfe",
          covariate.labels = c("Female","DSG","Female X DSG","Constant"),
          column.labels=c("No LVR","Incl. LVR","Excl. Fem. Outl.","Excl. Male Outl.","Excl. M/F Outl."), keep.stat = c("n"),
          dep.var.labels.include = FALSE, keep = c("female","ispart3","female:ispart3","Constant"))



################################################################################################
########	App D: Robustness of Effect of NMVW on Lead/Candidate by Gender	        	########
################################################################################################


###
# Table D.1: Effect of NMVW on leadership, candidacy, by gender (outliers robustness)
###

iscand_mf_nash_ols <- lm(iscandidate ~female*NMV_ave_100, data = subset(sessions,  !(is.na(boughtTickets)) & Stepgroup.Label=="ElectionsandContest" ))
iscand_mf_nash_nolvr <- glm(iscandidate ~female*NMV_ave_noLVR_100, data = subset(sessions,  !(is.na(boughtTickets)) & Stepgroup.Label=="ElectionsandContest" ), family = "binomial")
iscand_mf_nash_log_nofout <- glm(iscandidate ~female*NMV_ave_100, data = subset(sessions,  !(is.na(boughtTickets)) & Stepgroup.Label=="ElectionsandContest" & participantfe_string != "s.225.P.8"), family = binomial)
iscand_mf_nash_log_nofmout <- glm(iscandidate ~female*NMV_ave_100, data = subset(sessions,  !(is.na(boughtTickets)) & Stepgroup.Label=="ElectionsandContest" & participantfe_string != "s.223.P.15" & participantfe_string != "s.219.P.20" & participantfe_string != "s.209.P.8" & participantfe_string != "s.207.P.13" & participantfe_string != "s.225.P.8"), family = binomial)

isleader_mf_nash_ols <- lm(isleader ~female*NMV_ave_100, data = subset(sessions,  !(is.na(boughtTickets)) & Stepgroup.Label=="ElectionsandContest" ))
isleader_mf_nash_nolvr <- glm(isleader ~female*NMV_ave_noLVR_100, data = subset(sessions,  !(is.na(boughtTickets)) & Stepgroup.Label=="ElectionsandContest" ), family = "binomial")
isleader_mf_nash_log_nofout <- glm(isleader ~female*NMV_ave_100, data = subset(sessions,  !(is.na(boughtTickets)) & Stepgroup.Label=="ElectionsandContest" & participantfe_string != "s.225.P.8"), family = binomial)
isleader_mf_nash_log_nofmout <- glm(isleader ~female*NMV_ave_100, data = subset(sessions,  !(is.na(boughtTickets)) & Stepgroup.Label=="ElectionsandContest" & participantfe_string != "s.223.P.15" & participantfe_string != "s.219.P.20" & participantfe_string != "s.209.P.8" & participantfe_string != "s.207.P.13" & participantfe_string != "s.225.P.8"), family = binomial)

stargazer(iscand_mf_nash_ols, iscand_mf_nash_nolvr, iscand_mf_nash_log_nofout, iscand_mf_nash_log_nofmout,
			isleader_mf_nash_ols, isleader_mf_nash_nolvr, isleader_mf_nash_log_nofout, isleader_mf_nash_log_nofmout, type = "latex",
          title = "Effect of NMVW on Leadership and Candidacy, by Gender",
          label="tab:leadcandnomfout",
          covariate.labels = c("Female","Nash NMVW","Female X Nash NMVW","Nash NMVW (no LVR)","Fem. X NMVW (no LVR)","Constant"),
          column.labels=c("OLS","No LVR","Excl. Fem. Outl.","Excl. M/F Outl.","OLS","No LVR","Excl. Fem. Outl.","Excl. M/F Outl."),
          dep.var.labels=c("Is Candidate", "Is Leader"), keep.stat = c("n"))

###
# Analysis in D.2: Effect of NMVW on being the winning candidate
###

# Effect of NMVW on being a winning candidate
iswincand_mf_icgave <- lm(isleader~female*ICG_tickets_ave_100 , data = subset(sessions,  !(is.na(boughtTickets)) & Stepgroup.Label=="ElectionsandContest" & iscandidate == 1 & ranunopposed == 0))
iswincand_mf_nash <- lm(isleader ~female*NMV_ave_100, data = subset(sessions,  !(is.na(boughtTickets)) & Stepgroup.Label=="ElectionsandContest" & iscandidate == 1 & ranunopposed == 0))

stargazer(iswincand_mf_icgave, iswincand_mf_nash, type = "latex",
          title = "Effect of NMVW on Winning, Conditional on Candidacy, by Gender, Logit",
          label="tab:wincand",
          covariate.labels = c("Female","Nash NMVW","Female X Nash NMVW","Ave. ICG Tickets","Female X Ave ICG Tick.","Constant"),
          keep.stat = c("n"))

# Using NMVW measures that exclude LVR
iswincand_mf_nash_nolvr <- lm(isleader ~female*NMV_ave_noLVR_100, data = subset(sessions,  !(is.na(boughtTickets)) & Stepgroup.Label=="ElectionsandContest" & iscandidate == 1 & ranunopposed == 0))
iswincand_mf_icgave_nolvr <- lm(isleader~female*ICG_tickets_ave_noLVR_100 , data = subset(sessions,  !(is.na(boughtTickets)) & Stepgroup.Label=="ElectionsandContest" & iscandidate == 1 & ranunopposed == 0))

# Excluding F outliers
iswincand_mf_icgave_nofout <- lm(isleader~female*ICG_tickets_ave_100 , data = subset(sessions,  !(is.na(boughtTickets)) & Stepgroup.Label=="ElectionsandContest" & iscandidate == 1 & ranunopposed == 0 & participantfe_string != "s.225.P.8"))
iswincand_mf_nash_nofout <- lm(isleader ~female*NMV_ave_100, data = subset(sessions,  !(is.na(boughtTickets)) & Stepgroup.Label=="ElectionsandContest" & iscandidate == 1 & ranunopposed == 0 & participantfe_string != "s.225.P.8"))

# Excluding M/F outliers
iswincand_mf_icgave_nomfout <- lm(isleader~female*ICG_tickets_ave_100 , data = subset(sessions,  !(is.na(boughtTickets)) & Stepgroup.Label=="ElectionsandContest" & iscandidate == 1 & ranunopposed == 0 & participantfe_string != "s.223.P.15" & participantfe_string != "s.219.P.20" & participantfe_string != "s.209.P.8" & participantfe_string != "s.207.P.13" & participantfe_string != "s.225.P.8"))
iswincand_mf_nash_nomfout <- lm(isleader ~female*NMV_ave_100, data = subset(sessions,  !(is.na(boughtTickets)) & Stepgroup.Label=="ElectionsandContest" & iscandidate == 1 & ranunopposed == 0 & participantfe_string != "s.223.P.15" & participantfe_string != "s.219.P.20" & participantfe_string != "s.209.P.8" & participantfe_string != "s.207.P.13" & participantfe_string != "s.225.P.8"))

stargazer(iswincand_mf_nash_nolvr, iswincand_mf_icgave_nolvr,  type = "latex",
          title = "Effect of NMVW on Winning, Conditional on Candidacy, by Gender, Logit, No LVR",
          label="tab:wincandnolvr",
          covariate.labels = c("Female","Nash NMVW","Female X Nash NMVW","Ave. ICG Tickets","Female X Ave ICG Tick.","Constant"),
          keep.stat = c("n"))

stargazer(iswincand_mf_nash_nomfout, iswincand_mf_icgave_nomfout,  type = "latex",
          title = "Effect of NMVW on Winning, Conditional on Candidacy, by Gender, Logit, Excl. Female Outlier",
          label="tab:wincandnofout",
          covariate.labels = c("Female","Nash NMVW","Female X Nash NMVW","Ave. ICG Tickets","Female X Ave ICG Tick.","Constant"),
          keep.stat = c("n"))

stargazer(iswincand_mf_nash_nomfout, iswincand_mf_icgave_nomfout,  type = "latex",
          title = "Effect of NMVW on Winning, Conditional on Candidacy, by Gender, Logit, No M/F Outliers",
          label="tab:wincandnoout",
          covariate.labels = c("Female","Nash NMVW","Female X Nash NMVW","Ave. ICG Tickets","Female X Ave ICG Tick.","Constant"),
          keep.stat = c("n"))






################################################################################################
########	App E: Campaign Messages										        	########
################################################################################################

###
# Table E.1
###

# This gives the win percentages shown in the first line of Table E.1
camp<-ddply(subset(sessions,  sessions$Stepgroup.Label=="ElectionsandContest"&sessions$iscandidate==1 & sessions$ranunopposed==0 ), "msg_type_primary",summarise, Total=sum(iscandidate),Individual=length(levels(as.factor(as.character(participantfe_factor)))),NMVW=mean(NMV_ave),Win=100*mean(iswinningcandidate))

# This gives the numbers for Primary Category, Individuals, and Average NMVW for the bottom part of the table
s <- sessions
s_f<-subset(s,s$female==1)
s_m<-subset(s,s$female==0)

camp_f<-ddply(s_f,"msg_type_primary",summarise, Total=sum(iscandidate),Individual=length(levels(as.factor(as.character(participantfe_factor)))),NMVW=mean(NMV_ave),Win=100*mean(iswinningcandidate))
camp_m<-ddply(s_m,"msg_type_primary",summarise, Total=sum(iscandidate),Individual=length(levels(as.factor(as.character(participantfe_factor)))),NMVW=mean(NMV_ave),Win=100*mean(iswinningcandidate))
camp_effect<-ddply(s,"msg_type_primary",summarise, Win=100*mean(iswinningcandidate))
camp_effect<-camp_effect[order(camp_effect$Win),]

camp_f$TotalPerc<-camp_f$Total/nrow(s_f)*100
camp_m$TotalPerc<-camp_m$Total/nrow(s_m)*100

# The percentages (which show the percentage of all messages that were of a particular type, we calculated from the other numbers in the table.)
#	Eg, 17/(17+24+10+46+44+22+24+9+24) = 0.07727273 ---> 8% of messages from women were of the primary type "Null"

###
# Table E.2: Effect of gender and NMVW on winning, controlling for campaign message type
###

m1<-glm(isleader~NMV_ave_100*female+ candidateMsg_char,data=subset(sessions,  sessions$Stepgroup.Label=="ElectionsandContest"&sessions$iscandidate==1 & sessions$ranunopposed==0 ) ,family="binomial")
m2<-glm(isleader~NMV_ave_100*female+ msg_type_primary,data=subset(sessions,  sessions$Stepgroup.Label=="ElectionsandContest"&sessions$iscandidate==1 & sessions$ranunopposed==0 ) ,family="binomial")
m3<-glm(isleader~NMV_ave_100*female+ candidateMsg_char+msg_type_primary,data=subset(sessions,  sessions$Stepgroup.Label=="ElectionsandContest"&sessions$iscandidate==1 & sessions$ranunopposed==0 ) ,family="binomial")


stargazer(m1,m2,m3, type = "latex",
          title = "Effect of Gender and NMVW on Winning, Controlling for Campaign Messages",
          label="tab:camp_regression",
       covariate.labels = c("NMVW","Female","Length","Bid High","Bid Low","Humor","Critique","Skill","Speed","Strategy","Team","Track Record","NMVW x Female"),
          dep.var.labels=c("Single category","Multiple categories")
       )


###
# Table E.3: Same as E.2 but with alternate message content coding
###

sessions <- sessions %>% mutate(msg_team = ifelse (msg_inteam == 1 | msg_outteam == 1, 1, 0))

m2b<-glm(isleader~NMV_ave_100*female+ msg_bidhigh + msg_bidlow + msg_humor + msg_pastbad + msg_skill + msg_speed + msg_strategy + msg_team + msg_trackrecord,data=subset(sessions,  sessions$Stepgroup.Label=="ElectionsandContest"&sessions$iscandidate==1 & sessions$ranunopposed==0 ) ,family="binomial")
m3b<-glm(isleader~NMV_ave_100*female+ candidateMsg_char+msg_bidhigh + msg_bidlow + msg_humor + msg_pastbad + msg_skill + msg_speed + msg_strategy + msg_team + msg_trackrecord,data=subset(sessions,  sessions$Stepgroup.Label=="ElectionsandContest"&sessions$iscandidate==1 & sessions$ranunopposed==0 ) ,family="binomial")

# "The results also obtain if we use a coding of message types based on whether the message contained any amount of a particular type, as opposed to being the message's primary type."
stargazer(m2b,m3b, type = "latex",
          title = "Effect of Gender and NMVW on Winning, Controlling for Campaign Messages (Any content coding)",
          label="tab:camp_regression",
       covariate.labels = c("NMVW","Female","Message Length","Bid High","Bid Low","Humor","Critique","Skill","Speed","Strategy","Team","Track Record","NMVW x Female")
       )










########################################
########	F1. RSG Analysis	########
########################################

# This was in the RR note, but not the manuscript.  Included here just in case.
### Replicate Table 1 with RSG data
#  Leaders vs not leaders 
ddply(subset(sessions,   !(is.na(boughtTickets)) & Stepgroup.Label=="ElectionsandContest"  & iscandidate==1 & isleader==1), c("gender"), summarise, RSG_tickets_ave=mean(RSG_tickets_ave))
ddply(subset(sessions,   !(is.na(boughtTickets)) & Stepgroup.Label=="ElectionsandContest"  &  isleader==0), c("gender"), summarise, RSG_tickets_ave=mean(RSG_tickets_ave))
ddply(subset(sessions,   !(is.na(boughtTickets)) & Stepgroup.Label=="ElectionsandContest"  & iscandidate==1& isleader==1), c("gender"), summarise, RSG_NMV_ave=mean(RSG_NMV_ave))
ddply(subset(sessions,   !(is.na(boughtTickets)) & Stepgroup.Label=="ElectionsandContest"  &  isleader==0), c("gender"), summarise, RSG_NMV_ave=mean(RSG_NMV_ave))

# Candidates vs Not Candidates
ddply(subset(sessions,   !(is.na(boughtTickets)) & Stepgroup.Label=="ElectionsandContest" ), c("iscandidate","gender"), summarise, RSG_tickets_ave=mean(RSG_tickets_ave))
ddply(subset(sessions,  !(is.na(boughtTickets)) & Stepgroup.Label=="ElectionsandContest" ), c("iscandidate","gender"), summarise, RSG_NMV_ave=mean(RSG_NMV_ave))

# Winners vs Losers
ddply(subset(sessions,  !(is.na(boughtTickets)) & Stepgroup.Label=="ElectionsandContest" & iscandidate==1 &ranunopposed==0), c("isleader","gender"), summarise, RSG_tickets_ave=mean(RSG_tickets_ave))
ddply(subset(sessions,   !(is.na(boughtTickets)) & Stepgroup.Label=="ElectionsandContest"& iscandidate==1 &ranunopposed==0), c("isleader","gender"), summarise, RSG_NMV_ave=mean(RSG_NMV_ave))


###
# Figure F.1
###

# Distributions for RSG
	leadertixbygenderyeslvrrsg <- ddply(subset(sessions, isleader == 1 & Stepgroup.Label=="GroupExperiment"), c("gender"), summarise, ticket_ave=mean(boughtTickets))
		# The variances are very large because it's large numbers squared
	rsgvarbygender <- ddply(subset(sessions, isleader == 1 & Stepgroup.Label=="GroupExperiment"), c("gender"), summarise, ticket_var=var(boughtTickets))

	ggplot(subset(sessions, isleader == 1 & Stepgroup.Label=="GroupExperiment"), aes(x= boughtTickets, color= gender)) +
	  geom_density() +
	  geom_rug(data = subset(sessions, isleader == 1 & Stepgroup.Label=="LotteryExperiment"), aes(x= boughtTickets, color= gender, y = 0), position = position_jitter(height = 0)) +
	  geom_vline(data = leadertixbygenderyeslvrrsg, aes(xintercept= ticket_ave, color= gender), linetype = "dashed") +
	  labs(color='') + scale_color_manual(labels = c("women", "men"), values = c("black","grey55")) + xlab("Tickets Bought in RSG") + ylab("Density") +
    theme(axis.title = element_text(size = 14), legend.text=element_text(size=14)) +
		annotate("text",x=c(300,310), y=c(0.0014,0.0013), label=c("Difference =","2.5 tickets"),size=5)
#		annotate("text",x=c(468,468), y=c(0.0004,0.0003), label=c("Var men/women =","120168 / 107340"),size=5)


###
# Table F.1
###

rsgcand1 <- glm(iscandidate~female*RSG_tickets_ave_100 , data = subset(sessions,  !(is.na(boughtTickets)) & Stepgroup.Label=="ElectionsandContest" ), family = binomial)
rsgcand2 <- glm(iscandidate ~female*RSG_NMV_ave_100, data = subset(sessions,  !(is.na(boughtTickets)) & Stepgroup.Label=="ElectionsandContest" ), family = binomial)

rsglead1 <- glm(isleader~female*RSG_tickets_ave_100, data = subset(sessions,  !(is.na(boughtTickets)) & Stepgroup.Label=="ElectionsandContest" ), family = binomial)
rsglead2 <- glm(isleader ~female*RSG_NMV_ave_100, data = subset(sessions,  !(is.na(boughtTickets)) & Stepgroup.Label=="ElectionsandContest" ), family = binomial)

rsgwin1 <- glm(iswinningcandidate~female*RSG_tickets_ave_100, data = subset(sessions,  !(is.na(boughtTickets)) & Stepgroup.Label=="ElectionsandContest" &iscandidate==1 & ranunopposed==0), family = binomial)
rsgwin2 <- glm(iswinningcandidate ~female*RSG_NMV_ave_100, data = subset(sessions,  !(is.na(boughtTickets)) & Stepgroup.Label=="ElectionsandContest"  &iscandidate==1& ranunopposed==0), family = binomial)

stargazer(rsglead2, rsglead1, rsgcand2, rsgcand1, rsgwin2, rsgwin1 ,type = "latex",
          title = "Effect of NMVW on Leadership and Candidacy, by Gender",
          label="tab:leadcandlogitrsg",
          covariate.labels = c("Female","RSG Nash NMVW","Female X RSG Nash NMVW","Ave. RSG Tickets","Female X Ave RSG Tick.","Constant"),
          dep.var.labels=c("Is Leader", "Is Candidate","Is Winner"), keep.stat = c("n"))

###
# Figure F.2
###

leadplotrsg <- interact_plot(rsglead2, pred = RSG_NMV_ave_100, modx = female, interval = TRUE, x.label = "RSG Average Nash NMVW", y.label = "Pr. of Becoming Leader", modx.labels = c("men","women"), legend.main = "Leadership", rug = TRUE) + scale_y_continuous(,limits=c(0,0.75))
ggsave("~/Dropbox/Selection Experiments Harvard/Drafts and Presentations/leadrsg.pdf")


candplotrsg <- interact_plot(rsgcand2, pred = RSG_NMV_ave_100, modx = female, interval = TRUE, x.label = "RSG Average Nash NMVW", y.label = "Pr. of Candidacy", modx.labels = c("men","women"), legend.main = "Candidacy", rug = TRUE) + scale_y_continuous(,limits=c(0,1))
ggsave("~/Dropbox/Selection Experiments Harvard/Drafts and Presentations/candrsg.pdf")


winplotrsg <- interact_plot(rsgwin2, pred = RSG_NMV_ave_100, modx = female, interval = TRUE, x.label = "RSG Average Nash NMVW", y.label = "Pr. of Winning", modx.labels = c("men","women"), legend.main = "Winning Candidacy", rug = TRUE) + scale_y_continuous(,limits=c(0,1))
ggsave("~/Dropbox/Selection Experiments Harvard/Drafts and Presentations/winrsg.pdf")


###
# Table F.2
###

camplength1<-lm(candidateMsg_char~ female*RSG_tickets_ave_100,data=subset(sessions,sessions$iscandidate==1 & sessions$Stepgroup.Label=="ElectionsandContest" ))
camplength2<-lm(candidateMsg_char~ female*RSG_NMV_ave_100,data=subset(sessions,sessions$iscandidate==1 & sessions$Stepgroup.Label=="ElectionsandContest" ))

stargazer(camplength1,camplength2, type = "latex",
          title = "Length of Campaign Messages, by Effort and Gender",
          label="tab:camplength_regression",
       covariate.labels = c("Female","Ave. RSG Tickets","Female x Ave. RSG Tickets ","Ave. RSG NMV","Female x Ave. RSG NMV"),
          dep.var.labels="Words in Campaign Message",notes = "Averages given in 100s of tickets."
       )

###
# Table F.3
###

m1rsg<-glm(isleader~RSG_NMV_ave_100*female+ candidateMsg_char,data=subset(sessions,  sessions$Stepgroup.Label=="ElectionsandContest"&sessions$iscandidate==1 & sessions$ranunopposed==0 ) ,family="binomial")

m2brsg<-glm(isleader~RSG_NMV_ave_100*female+ msg_bidhigh + msg_bidlow + msg_humor + msg_pastbad + msg_skill + msg_speed + msg_strategy + msg_team + msg_trackrecord,data=subset(sessions,
		sessions$Stepgroup.Label=="ElectionsandContest"&sessions$iscandidate==1 & sessions$ranunopposed==0 ) ,family="binomial")

m3brsg<-glm(isleader~RSG_NMV_ave_100*female+ candidateMsg_char+msg_bidhigh + msg_bidlow + msg_humor + msg_pastbad + msg_skill + msg_speed + msg_strategy + msg_team + msg_trackrecord,data=subset(sessions,  sessions$Stepgroup.Label=="ElectionsandContest"&sessions$iscandidate==1 & sessions$ranunopposed==0 ) ,family="binomial")

stargazer(m1rsg,m2brsg,m3brsg, type = "latex",
          title = "Effect of Gender and RSG NMVW on Winning, Controlling for Campaign Messages",
          label="tab:camp_regressionrsg",
       covariate.labels = c("RSG NMVW","Female","Length","Bid High","Bid Low","Humor","Critique","Skill","Speed","Strategy","Team","Track Record","RSG NMVW x Female")
       )






########################################
########	F2. Risk Aversion	########
########################################

### Figure F.3 is reproduced from the appendix of Chaudoin, Hummel and Park "The Election Effect" ISQ.

###
# These are data setup lines of code that create variables used in the risk aversion analysis
###

# For each participant, this creates variables that are their average ticket purchases for the ICG LVRs and the ICG rounds with 1225/1235 prize values
test <- sessions %>% subset(Stepgroup.Label=="LotteryExperiment" & prize < 1000) %>% group_by(participantfe_string) %>% dplyr::mutate(icglvr_mean=mean(boughtTickets, na.rm = TRUE)) %>% ungroup()
test2 <- sessions %>% subset(Stepgroup.Label=="LotteryExperiment" & prize < 1236 & prize > 1224) %>% group_by(participantfe_string) %>% dplyr::mutate(icg1225_mean=mean(boughtTickets, na.rm = TRUE)) %>% ungroup()
test3 <- sessions %>% subset(Stepgroup.Label=="LotteryExperiment" & prize < 1616 & prize > 1604) %>% group_by(participantfe_string) %>% dplyr::mutate(icg1605_mean=mean(boughtTickets, na.rm = TRUE)) %>% ungroup()

testdf <- as.data.frame(test)
test2df <- as.data.frame(test2)
test3df <- as.data.frame(test3)

#testdf %>% subset(participantfe_string == "s.207.P.1" & Stepgroup.Label=="LotteryExperiment")
#test2df %>% subset(participantfe_string == "s.207.P.1" & Stepgroup.Label=="LotteryExperiment")
#sessions %>% subset(participantfe_string == "s.207.P.1" & Stepgroup.Label=="LotteryExperiment")

# Merging the two test dataframes into sessions
sessions <- merge(x = sessions, y = testdf[ , c("participantfe_string","Stepgroup.Loop","Stepgroup.Label","icglvr_mean")], by = c("participantfe_string", "Stepgroup.Loop", "Stepgroup.Label"), all.x = TRUE)
sessions <- merge(x = sessions, y = test2df[ , c("participantfe_string","Stepgroup.Loop","Stepgroup.Label","icg1225_mean")], by = c("participantfe_string", "Stepgroup.Loop", "Stepgroup.Label"), all.x = TRUE)
sessions <- merge(x = sessions, y = test3df[ , c("participantfe_string","Stepgroup.Loop","Stepgroup.Label","icg1605_mean")], by = c("participantfe_string", "Stepgroup.Loop", "Stepgroup.Label"), all.x = TRUE)

sessions <- sessions %>% group_by(participantfe_string) %>% mutate(icglvr_mean =max(icglvr_mean, na.rm = TRUE))
sessions <- sessions %>% group_by(participantfe_string) %>% mutate(icg1225_mean =max(icg1225_mean, na.rm = TRUE))
sessions <- sessions %>% group_by(participantfe_string) %>% mutate(icg1605_mean =max(icg1605_mean, na.rm = TRUE))

sessions <- as.data.frame(sessions)

sessions$pctincrease_ra <- (sessions$icg1225_mean-sessions$icglvr_mean)/sessions$icglvr_mean
sessions$pctincrease_ra2 <- (sessions$icg1605_mean-sessions$icglvr_mean)/sessions$icglvr_mean
sessions$pctincrease_ra3 <- (sessions$icg1605_mean-sessions$icg1225_mean)/sessions$icg1225_mean


###
# Table F.4
###

sessions$nomincrease_ra <- (sessions$icg1225_mean-sessions$icglvr_mean)

win1nomra<- glm(isleader ~female*nomincrease_ra +female*NMV_ave_100, data = subset(sessions, Stepgroup.Label=="ElectionsandContest"  & sessions$ranunopposed==0),family="binomial")
win2nomra<- glm(isleader ~female*nomincrease_ra +female*ICG_tickets_ave_100, data = subset(sessions, Stepgroup.Label=="ElectionsandContest" & sessions$ranunopposed==0),family="binomial")

cand1nomra<- glm(iscandidate~female*nomincrease_ra +female*NMV_ave_100, data = subset(sessions, Stepgroup.Label=="ElectionsandContest"),family="binomial")
cand2nomra<- glm(iscandidate~female*nomincrease_ra +female*ICG_tickets_ave_100, data = subset(sessions, Stepgroup.Label=="ElectionsandContest"),family="binomial")

iswinner1nomra <- glm(iswinningcandidate ~female*nomincrease_ra +female*NMV_ave_100, data = subset(sessions,  !(is.na(boughtTickets)) & Stepgroup.Label=="ElectionsandContest"  &iscandidate==1& ranunopposed==0), family = binomial)
iswinner2nomra<- glm(iswinningcandidate~female*nomincrease_ra +female*ICG_tickets_ave_100, data = subset(sessions,  !(is.na(boughtTickets)) & Stepgroup.Label=="ElectionsandContest" &iscandidate==1 & ranunopposed==0), family = binomial)

stargazer(win1nomra, win2nomra, cand1nomra, cand2nomra, iswinner1nomra, iswinner2nomra ,type = "latex",
          title = "Effect of NMVW and risk aversion measure on Leadership and Candidacy, by Gender",
          label="tab:leadcandlogitranom",
          covariate.labels = c("Female","RA Measure","Nash NMVW","Ave. ICG Tickets","Female X RA Measure","Female X NMVW","Female X Ave ICG Tick.","Constant"),
          dep.var.labels=c("Is Leader", "Is Candidate","Is Winner"), keep.stat = c("n"))




####################################################
########	F3. Confidence and Learning		########
####################################################

###
# Table F.5: Difference in ICG payoffs by gender
###

#  Leaders vs not leaders 
ddply(subset(sessions,   !(is.na(boughtTickets)) & Stepgroup.Label=="ElectionsandContest"  & iscandidate==1 & isleader==1), c("gender"), summarise, ICG_payoff_ave=mean(ICG_payoff_ave,na.rm=TRUE))
ddply(subset(sessions,   !(is.na(boughtTickets)) & Stepgroup.Label=="ElectionsandContest"  &  isleader==0), c("gender"), summarise, ICG_payoff_ave=mean(ICG_payoff_ave,na.rm=TRUE))
ddply(subset(sessions,   !(is.na(boughtTickets)) & Stepgroup.Label=="ElectionsandContest"  & iscandidate==1& isleader==1), c("gender"), summarise, ICG_win_ave=mean(ICG_win_ave,na.rm=TRUE))
ddply(subset(sessions,   !(is.na(boughtTickets)) & Stepgroup.Label=="ElectionsandContest"  &  isleader==0), c("gender"), summarise, ICG_win_ave=mean(ICG_win_ave,na.rm=TRUE))

# Candidates vs Not Candidates
ddply(subset(sessions,   !(is.na(boughtTickets)) & Stepgroup.Label=="ElectionsandContest" ), c("iscandidate","gender"), summarise, ICG_payoff_ave=mean(ICG_payoff_ave,na.rm=TRUE))
ddply(subset(sessions,  !(is.na(boughtTickets)) & Stepgroup.Label=="ElectionsandContest" ), c("iscandidate","gender"), summarise, ICG_win_ave=mean(ICG_win_ave,na.rm=TRUE))

# Winners vs Losers
ddply(subset(sessions,  !(is.na(boughtTickets)) & Stepgroup.Label=="ElectionsandContest" & iscandidate==1 &ranunopposed==0), c("isleader","gender"), summarise, ICG_payoff_ave=mean(ICG_payoff_ave,na.rm=TRUE))
ddply(subset(sessions,   !(is.na(boughtTickets)) & Stepgroup.Label=="ElectionsandContest"& iscandidate==1 &ranunopposed==0), c("isleader","gender"), summarise, ICG_win_ave=mean(ICG_win_ave,na.rm=TRUE))


###
# Table F.6: Effect of ICG payoff on candidacy, by gender
###

cand1<- glm(iscandidate~female*ICG_payoff_ave , data = subset(sessions, Stepgroup.Label=="ElectionsandContest" ),family="binomial")
cand2<- glm(iscandidate~female*ICG_payoff_ave+female*ICG_tickets_ave_100, data = subset(sessions, Stepgroup.Label=="ElectionsandContest" ),family="binomial")
cand3<- glm(iscandidate~female*ICG_payoff_ave+female*NMV_ave_100, data = subset(sessions, Stepgroup.Label=="ElectionsandContest" ),family="binomial")


stargazer(cand1,cand2,cand3,type = "latex",
          title = "Differences Between Candidates and Not Candidates, by Gender",
          label="tab:selfselection_regression_confidence",
          covariate.labels = c("Female","Ave. ICG Payoff","Ave. ICG Tickets","Ave. ICG NMV","Female x Ave. ICG Payoff","Female x Ave. ICG Tickets ","Female x ICG NMV Ave."),
          dep.var.labels="Candidate",notes = "Averages given in 100s of tickets.")

###
# Table F.7: Effect of ICG payoff on electoral success, by gender
###

win1<- glm(isleader~female*ICG_payoff_ave, data = subset(sessions, Stepgroup.Label=="ElectionsandContest" & sessions$iscandidate==1 & sessions$ranunopposed==0),family="binomial")
win2<- glm(isleader~female*ICG_payoff_ave+female*ICG_tickets_ave_100, data = subset(sessions, Stepgroup.Label=="ElectionsandContest" & sessions$iscandidate==1& sessions$ranunopposed==0),family="binomial")
win3<- glm(isleader~female*ICG_payoff_ave+female*NMV_ave_100, data = subset(sessions, Stepgroup.Label=="ElectionsandContest" & sessions$iscandidate==1& sessions$ranunopposed==0),family="binomial")

stargazer(win1,win2, win3,type = "latex",
          title = "Differences Between Winning and Losing Candidates, by Gender",
          label="tab:groupselection_regression_confidence",
       covariate.labels = c("Female","Ave. ICG Payoff","Ave. ICG Tickets","Ave. ICG NMV","Female x Ave. ICG Payoff","Female x Ave. ICG Tickets ","Female x ICG NMV Ave."),
          dep.var.labels="Candidate",notes = "Averages given in 100s of tickets.")

###
# Table F.8: Effect of lagged payoffs
###

ticksnofe<-lm(boughtTickets ~ female*payoff_lag, data = subset(sessions,isleader == 1 & Stepgroup.Label=="LotteryExperiment"))
summary(ticksnofe)
ticks<-lm(boughtTickets ~ female*payoff_lag+factor(payofflevel), data = subset(sessions,isleader == 1 & Stepgroup.Label=="LotteryExperiment"))
summary(ticks)

stargazer(ticksnofe, ticks ,type = "latex",
	title = "Effect of Lagged Payoffs on Ticket Purchases, by Gender",
	label="tab:con_laggedpayoff",
	keep=c("female","payoff_lag","winner_lag","loser_lag"),
	covariate.labels = c("Female","Payoff (lagged)","Female x Payoff (lagged)"))
#	dep.var.labels=c("No prize FE", "w/ prize FE"))


###
# Table F.9: Effect of electoral wins/losses
###

cand<- glm(iscandidate~female*winner_lag +female*loser_lag , data = subset(sessions, Stepgroup.Label=="ElectionsandContest" ),family="binomial")
win<- glm(isleader~female*winner_lag +female*loser_lag , data = subset(sessions, Stepgroup.Label=="ElectionsandContest" & sessions$iscandidate==1 & sessions$ranunopposed==0),family="binomial")

stargazer(cand,win,type = "latex",
          title = "Differences Between Winning and Losing Candidates, by Gender",
          label="tab:groupselection_regression_confidence",
          keep=c("female","payoff_lag","winner_lag","loser_lag"),
      covariate.labels = c("Female","Winning Candidate (lagged)","Losing Candidate (lagged)","Female x Winner (lagged)","Female x Loser (Lagged)"),
          dep.var.labels=c("Candidate","Electoral Success"))


###
# Figure F.4: Learning over time in the ICG
###

ggplot(subset(sessions, Stepgroup.Label=="LotteryExperiment")) +
	geom_smooth(data = subset(sessions, Stepgroup.Label=="LotteryExperiment" & female == 1 & everelecleader==1), aes(x = Stepgroup.Loop, y = effort_pctprize, col = "Female, Eventual Leader"), span = 0.5, se=F) + 
	geom_smooth(data = subset(sessions, Stepgroup.Label=="LotteryExperiment" & female == 0 & everelecleader==1), aes(x = Stepgroup.Loop, y = effort_pctprize, col = "Male, Eventual Leader"), span = 0.5, se=F) + 
	geom_smooth(data = subset(sessions, Stepgroup.Label=="LotteryExperiment" & female == 1 & everelecleader==0), aes(x = Stepgroup.Loop, y = effort_pctprize, col = "Female, Never Leader"), span = 0.5, se=F) + 
	geom_smooth(data = subset(sessions, Stepgroup.Label=="LotteryExperiment" & female == 0 & everelecleader==0), aes(x = Stepgroup.Loop, y = effort_pctprize, col = "Male, Never Leader"), span = 0.5, se=F) +
	xlab("Round") + ylab("Tickets Purchased / Prize") + theme(legend.title = element_blank()) + scale_x_continuous(breaks = seq(0, 12, by = 1))


###
# Table F.10: Learning over time, by gender and eventual leadership/candidacy
###

# Did men and women learn differently in the ICG?  Eventual vs non-eventual leaders?
#	No.  Among eventual leaders, men and women "learned" the same.  Same for non-eventual leaders.
icg.learn.evlead <-lm(boughtTickets ~ female + female:Stepgroup.Loop + Stepgroup.Loop, data = subset(sessions, isleader == 1 & everelecleader==1 & Stepgroup.Label=="LotteryExperiment"))
summary(icg.learn.evlead)

icg.learn.nevlead <-lm(boughtTickets ~ female + female:Stepgroup.Loop + Stepgroup.Loop, data = subset(sessions, isleader == 1 & everelecleader==0 & Stepgroup.Label=="LotteryExperiment"))
summary(icg.learn.nevlead)

icg.learn.evcand <-lm(boughtTickets ~ female + female:Stepgroup.Loop + Stepgroup.Loop, data = subset(sessions, isleader == 1 & evercandidate==1 & Stepgroup.Label=="LotteryExperiment"))
summary(icg.learn.evcand)

icg.learn.ncand <-lm(boughtTickets ~ female + female:Stepgroup.Loop + Stepgroup.Loop, data = subset(sessions, isleader == 1 & evercandidate == 0 & Stepgroup.Label=="LotteryExperiment"))
summary(icg.learn.ncand)

stargazer(icg.learn.evlead, icg.learn.nevlead, icg.learn.evcand, icg.learn.ncand,  type = "latex",
          title = "Learning over time, by gender and eventual leadership/candidacy",
          label="tab:icglearn",
          covariate.labels = c("Female","Round number","Female X Round number","Constant"),
           keep.stat = c("n"))




############################################################
########	F4. The Election Effect by gender		########
############################################################

# The code for the regressions referenced in this appendix are here.

m1<-lm(boughtTickets ~    ispart3+factor(payofflevel) + participantfe_factor, data = subset(sessions,isleader == 1 &  female==0   &everelecleader==1 & Stepgroup.Label!="GroupExperiement"))
summary(m1)
m2<-lm(boughtTickets ~ ispart3 +factor(payofflevel) + participantfe_factor, data = subset(sessions,isleader == 1  & female==1& everelecleader==1 & Stepgroup.Label!="GroupExperiement"))
summary(m2)

m3<-lm(boughtTickets ~ ispart3*female+ factor(payofflevel) + participantfe_factor-1, data = subset(sessions,isleader == 1 & everelecleader==1 & Stepgroup.Label!="GroupExperiement"))
summary(m3)

stargazer(m1,m2,m3,type = "latex",
          title = "Within-Participant Changes in Tickets Bought for Eventual Leaders, by Gender",
          label="tab:election_regression",
          keep=c("female","ispart3","Constant"),
          dep.var.labels="Tickets Bought",
          add.lines = c(list(c("Prize FEs?","Y","Y"),c("Participant FEs?","Y","Y"))))





############################################################
########	F5. Re-election effects					########
############################################################

# The code for the regressions referenced in this appendix are here.

m1<-lm(boughtTickets ~ isoddround+factor(payofflevel), data = subset(sessions,isleader == 1 & Stepgroup.Label!="GroupExperiement"))
summary(m1)
m2<-lm(boughtTickets ~ isoddround*female+factor(payofflevel), data = subset(sessions,isleader == 1 & Stepgroup.Label!="GroupExperiement"))
summary(m2)
stargazer(m1,m2,type = "latex",
          title = "Reelection effects, by Gender",
          label="tab:reelection_regression",
          keep=c("isoddround","female"),
          covariate.labels = c("Reelection round","Female", "Reelection x Female"),
          dep.var.labels="Tickets Bought",
                  add.lines = c(list(c("Prize FEs?","Y","Y"))))






#############################################################################################################################################################################################################################
#############################################################################################################################################################################################################################
#############################################################################################################################################################################################################################
#############################################################################################################################################################################################################################
#############################################################################################################################################################################################################################
#############################################################################################################################################################################################################################












